home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
dsp
/
fft
/
fft_eyal.lha
/
fft_eyal
/
fftg.c
< prev
next >
Wrap
C/C++ Source or Header
|
1991-08-31
|
12KB
|
362 lines
/* ------------------------------ fftg.c ------------------------------------ */
/* */
/* Author: Eyal Lebedinsky */
/* Date: May 1990 */
/* Version: 9 June 1991 */
/* */
/* Program generator for integer DFFT. This program was developed for a very */
/* specific need and no attempt was made to make it general. Still, the basic */
/* algorithm is a standard one from the literature. For maximum performance */
/* of the integer calculation a special mechanism was employed to track shift */
/* operations that could be delayed. Coefficients are kept as 16 bits with 1 */
/* sign bit, 1 integer bit and 14 fractional bits. So a typicall multiply */
/* will do 16bit*16bits=32bits, then shift down one bit and use the HIGH 16 */
/* bits as the result. Before adding two numbers they must be scaled down one */
/* bit to avoid overflow. Adding more numbers calls for more scaling. The */
/* delayed scaling allows the intermediate array to hold numbers that are */
/* scaled to a varying degree until a clash occurs. Well, this space is too */
/* short to elaborate any further and if you did similar kind of work then */
/* you should know what I am talking about. */
/* */
/* usage: fftgc File Size EpName */
/* File - output file name. */
/* Size - sample order: 8 for 256 points, 10 for 1024... */
/* EpName - name of the generated function ('_' added) */
/* Now assemble the output file and then use as: */
/* */
/* #define M 8 [or whatever size] */
/* #define N (1 << M) */
/* short x[N]; [input/output array] */
/* short qf[(N/2)+1]; [power spectrum] */
/* extern void far fft () */
/* */
/* [then call it as: fft();] */
/* */
/* This is a modified version of Sorensen et al, IEEE ASSP-35 no 6. */
/* */
/* - uses 3+3 complex mult. */
/* - no pre scrambling. descrambling done when power spectrum calculated. */
/* - moved to 0-origin as proper for c. */
/* - trig values in table. */
/* - the case for sin==cos treated separately. */
/* - uses delayed scaling down. */
/* - does length 256 fft with 642 mults. */
/* */
/* */
/* This program is released into the public domain. */
/* */
/*----------------------------------------------------------------------------*/
#include <stdio.h>
#include <math.h>
#define MAXPOINTS 1024
#define SC15 32768 /* scaling factor 2^15 */
/***
About SC15:
Fractions are kept as 16 bit quantities with one sign bit, no integer
bits and 15 fractional bits. SC15 is used to convert a floating point
number (in the range -1 .. 1) into such an integer.
There is no integer part because all fractions are really kept already
scaled down by a factor of 2. This is a result of the fact that all
fractions are constants (sin() and cos() values) and whenever we
multiply a point by a fraction we process to scale the result down by
one bit anywat - so the reduce constants save us some processing time.
***/
static short mm[MAXPOINTS+1]; /* digit reverse indices */
/***
About mm[]:
The values in the sample points array x[] are not scramled at any
stage. Instead, the array mm[] indicated where a point should be
if we DID do the scrambling. All indices into x[] use mm[] and
this is how the power spectrum extracts the data. This means that
you CANNOT use the x[] array after calling fft() because the points
are not in place. If you want to change the output of fft() then just
modify the last part of this program (the power spectrum logic) to
calculate whatever you need.
***/
static int cntx[9]; /* stats counters */
/***
About cntx[]:
This array gathers statistics:
0 memory loads
1 memory stores
2 register shifts
3 adds
4 mults
5 memory shifts due to smad() correction
***/
static char *file_name, *ep_name;
extern void start_fft (), end_fft ();
extern void fft1 (), /* scaling */
fft2 (), fft3 (), fft4 (), fft5 (), /* SRFFT */
fft7 (), fft8 (); /* power spectrum */
/***
about ad[], mad and smad():
Because we do integer arithmetic, adding two numbers can overflow the
integer magnitude. For this reason the integers are scaled down (by one
bit) before the addition. Sometimes we add more that two numbers and it
is necessary to scale further.
The FFT algorithm does not use all of the points in the same way in one
pass, which means that some data needs to be scaled and some does not
need to. Actualy, in the first pass some points are not used at all.
This means that SOME of the points would have been scaled down. If we
add (in a later stage) such a point with an un-scaled point the result
is invalid. For this purpose the array ad[] keeps track of how far each
point was scaled, smad() checks that a point is properly scaled and mad
is the needed scaling at this stage. As the FFT progresses, some points
are kept at a different scaling untill they are used in an expression
with other points. If smad() finds a point out of adjustment the it
issues a scaling instruction using the fft1() function.
The program was adjusted to optimize the scaling (the various
fft2() ... fft5() have internal scaling rules) and you may notice that
smad() hardly ever needs to do any correction (usually there is only one
final shift). This makes the whole ad[] almost redundant but it was left
in for future use.
If your smaple points are less that 16 bits then you still need to keep
scaling you data. You need to have enough spare bits to avoid the
scaling, and the number depends on the size of the sample - the larger
the sample the more stages the FFT has and the more spare bits you
need.
***/
static short ad[MAXPOINTS+1]; /* how far each point is adjusted */
static int mad; /* maximum adjust at this level */
static void
smad (i, j) /* set proper adjust */
int i, j;
{
while (ad[i] < (mad-j)) {
fprintf (stderr, "x[%u] >>= %d (%d-%d)\n", i, 1, mad, j);
fft1 (mm[i]);
++ad[i];
cntx[0] += 1;
cntx[1] += 1;
cntx[2] += 1;
cntx[5] += 1;
}
}
main (argc, argv)
int argc;
char *argv[];
{
int n, m, j, i, k, is, id, n1, n2, n4, n8, ind,
i0, i1, i2, i3, i4, i5, i6, i7, i8,
cc1, ss1, cc3, ss3;
float e, a, a3;
/* ---------- Handle arguments ------------------------------------------- */
if (argc < 4) {
printf ("usage: fftg File PowerOf2 EpName [stderrFile]\n");
exit (0);
}
file_name = argv[1];
sscanf (argv[2], "%u", &m);
ep_name = argv[3];
if (argc > 4)
freopen (argv[4], "wt", stderr);
/* ---------- Initialise ------------------------------------------------ */
start_fft (file_name, ep_name);
n = 1 << m;
for (i = 0; i < 9; ++i)
cntx[i] = 0;
mad = 0;
for (i = 1; i <= n; ++i)
ad[i] = 0;
/* ---------- Digit reverse counter -------------------------------------- */
for (i = 1; i <= n; ++i) {
mm[i] = i-1; /* '-1' for shifting to 0-origin */
}
j = 1;
n1 = n - 1;
for (i = 1; i <= n1; ++i) {
if (i < j) {
mm[i] = j-1; /* '-1' for shifting to 0-origin */
mm[j] = i-1; /* '-1' for shifting to 0-origin */
}
for (k = n / 2; k < j; k /= 2)
j -= k;
j += k;
}
/* ---------- Length two butterflies ------------------------------------- */
for (is = 1, id = 4; is < n; is = 2 * id - 1, id = 4 * id) {
for (i0 = is; i0 <= n; i0 += id) {
i1 = i0 + 1;
smad (i0, 0);
smad (i1, 0);
fft2 (mm[i0], mm[i1]);
++ad[i0];
++ad[i1];
cntx[0] += 2;
cntx[1] += 2;
cntx[2] += 2;
cntx[3] += 2;
}
}
/* ---------- L shaped butterflies --------------------------------------- */
n2 = 2;
for (k = 2; k <= m; ++k) {
++mad;
n2 = n2 * 2;
n4 = n2 / 4;
n8 = n2 / 8;
e = 6.2831853071719586 / n2; /* 2 * pi */
cc1 = (SC15/2) * 0.7071067811865475; /* sqrt (0.5) */
for (is = 0, id = n2 * 2; is < n; is = 2 * id - n2, id *= 4) {
for (i = is; i <= n - 1; i += id) {
i1 = i + 1;
i2 = i1 + n4;
i3 = i2 + n4;
i4 = i3 + n4;
smad (i1, 0);
smad (i3, 1);
smad (i4, 1);
fft3 (mm[i1], mm[i3], mm[i4]);
ad[i1] += 1;
ad[i3] += 2;
ad[i4] += 2;
cntx[0] += 3;
cntx[1] += 3;
cntx[2] += 5;
cntx[3] += 4;
if (n4 != 1) {
i1 = i1 + n8;
i2 = i2 + n8;
i3 = i3 + n8;
i4 = i4 + n8;
smad (i1, 1);
smad (i2, 0);
smad (i3, 1);
smad (i4, 1);
fft4 (mm[i1], mm[i2], mm[i3], mm[i4],
cc1);
ad[i1] += 2;
ad[i2] += 1;
ad[i3] += 2;
ad[i4] += 2;
cntx[0] += 4;
cntx[1] += 4;
cntx[2] += 2;
cntx[3] += 6;
cntx[4] += 2;
}
}
}
for (j = 2, a = e; j <= n8; ++j, a += e) {
a3 = 3 * a;
cc1 = (SC15/2) * cos (a);
ss1 = (SC15/2) * sin (a);
cc3 = (SC15/2) * cos (a3);
ss3 = (SC15/2) * sin (a3);
for (is = 0, id = n2 * 2; is < n;
is = 2 * id - n2, id *= 4) {
for (i = is; i <= n - 1; i += id) {
i1 = i + j;
i2 = i1 + n4;
i3 = i2 + n4;
i4 = i3 + n4;
i5 = i + n4 - j + 2;
i6 = i5 + n4;
i7 = i6 + n4;
i8 = i7 + n4;
smad (i1, 0);
smad (i2, 0);
smad (i3, 2);
smad (i4, 2);
smad (i5, 0);
smad (i6, 0);
smad (i7, 1);
smad (i8, 1);
if (ind = (ad[i3] < (mad-1))) {
++ad[i3];
++ad[i4];
cntx[2] += 1;
}
fft5 (mm[i1], mm[i2], mm[i3], mm[i4],
mm[i5], mm[i6], mm[i7], mm[i8],
ss1-cc1, -ss1-cc1, cc1,
ss3-cc3, -ss3-cc3, cc3, ind);
ad[i1] += 1;
ad[i2] += 1;
ad[i3] += 2;
ad[i4] += 2;
ad[i5] += 1;
ad[i6] += 1;
ad[i7] += 2;
ad[i8] += 2;
cntx[0] += 8;
cntx[1] += 8;
cntx[2] += 4;
cntx[3] += 18;
cntx[4] += 6;
}
}
}
}
/* ---------- force final adjustment ------------------------------------- */
++mad;
for (i = 1; i <= n; ++i)
smad (i, 0);
/* ---------- Power spectrum (no sqrt) ----------------------------------- */
fft7 (0, mm[1]);
for (i = 1; i < n/2; ++i)
fft8 (i, mm[i+1], mm[n-i+1]);
fft7 ((n/2), mm[n/2+1]);
/* ---------- end and show stats ----------------------------------------- */
end_fft (file_name, ep_name);
for (i = 0; i < 9; ++i)
fprintf (stderr, " %5u", i);
fprintf (stderr, "\n Load Store Shift Add Mult Adj\n");
for (i = 0; i < 9; ++i)
fprintf (stderr, " %5u", cntx[i]);
fprintf (stderr, "\n");
exit (0);
}